hiddenmeta workflow

Install and load packages

install.packages("DeclareDesign")
install.packages("igraph")

devtools::install_github("gerasy1987/hiddenmeta", build_vignettes = TRUE)
library(hiddenmeta)
library(DeclareDesign)
library(igraph)
library(knitr)

Step 1. Provide study design features

## STUDY 1
study_1 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 1000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = 0,
               p_edge_within = list(known = c(0.1, 0.3), hidden = c(0.1, 0.3)),
               p_edge_between = list(known = 0.1, hidden = 0.1),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = 1),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25)",
         "purrr::map_df(hidden, ~ sapply( `names<-`(rep(0, times = 10), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
         known_2 = 0.3, known_3 = 0.3)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   add_seeds = 3,
                   target_type = "sample",
                   target_n_rds = 60),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_n_tls = 180,
                   cluster = paste0("loc_", 1:10)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 200)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 100,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 1000,
                            rds_prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds"),
                             capture_parse = 
                              "strsplit(x = unique(na.omit(tls_locs_sampled)), split = ';')[[1]]",
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

Step 2. Declare study population

study_population <-
  eval(as.call(c(list(declare_population), study_1$pop)))

set.seed(872312)
example_pop <- study_population()

example_pop %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))

Show the network

g <-
  example_pop %$% {
    hiddenmeta:::retrieve_graph(links) %>%
      igraph::set_vertex_attr("name", value = name) %>%
      igraph::set_vertex_attr("type", value = type)
  }

igraph::V(g)$color <-
  plyr::mapvalues(igraph::V(g)$type,
                  from = unique(igraph::V(g)$type),
                  to = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), 
                                                 palette = "Set 3"))

plot(g,
     # layout = igraph::layout_on_grid(g, dim = 2, width = 100),
     layout = igraph::layout_on_grid(g, dim = 2, width = 150),
     vertex.size = 1.5, vertex.dist = 4, vertex.label = NA, edge.width = .2,
     edge.arrow.size = .2, edge.curved = .2)

legend(x = -1, y = -1.2,
       legend = c("none", "known only", "hidden only", "both"),
       pt.bg = grDevices::palette.colors(n = length(unique(igraph::V(g)$type)), palette = "Set 3"),
       pch = 21, col = "#777777", pt.cex = 1, cex = 1, bty = "o", ncol = 2)

Step 3. Declare all relevant study sampling procedures

The sampling procedures are additive in a sense that each procedure appends several columns relevant to the sampling procedure and particular draw based on population simulation, but does not change the study population data frame (unless you specify drop_nonsampled = TRUE).

study_sample_rds <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$rds)))

set.seed(872312)
draw_data(study_population + study_sample_rds) %>%  
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))
study_sample_pps <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$pps)))

set.seed(872312)
draw_data(study_population + study_sample_rds + study_sample_pps) %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))
study_sample_tls <- 
  eval(as.call(c(list(declare_sampling), study_1$sample$tls)))

set.seed(872312)
draw_data(study_population + 
            study_sample_rds + study_sample_pps + study_sample_tls) %>% 
  dplyr::sample_n(n()) %>% # shuffle data frame
  rmarkdown::paged_table(options = list(rows.print = 15))

Step 4. Declare study level estimands

study_estimands <- 
  eval(as.call(c(list(declare_inquiry), study_1$inquiries)))

set.seed(872312)
draw_estimands(study_population + 
                 # study_sample_rds + study_sample_pps + study_sample_tls + 
                 study_estimands) %>% 
  kable(digits = 2)
inquiry estimand
known_size 318.00
hidden_size 100.00
known_prev 0.32
hidden_prev 0.10
hidden_size_in_known 38.00
hidden_prev_in_known 0.12

Step 5. Declare estimators used in the study

est_sspse <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$sspse)))
est_chords <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$chords)))
est_multi <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$rds$multiplier)))

est_ht_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$ht)))
est_nsum_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$pps$nsum)))

est_ht_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$ht)))
est_nsum_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$nsum)))
est_recap_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$tls$recap)))

est_recap_rds_pps <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap1)))
est_recap_rds_tls <- 
  eval(as.call(c(list(declare_estimator), study_1$estimators$all$recap2)))

set.seed(872312)
draw_estimates(study_population +
                 study_sample_rds + study_sample_pps + study_sample_tls +
                 study_estimands +
                 est_sspse + est_chords + est_multi +
                 est_nsum_pps + est_ht_pps +
                 est_nsum_tls + est_ht_tls + est_recap_tls +
                 est_recap_rds_pps + est_recap_rds_tls) %>%
  kable(digits = 2)
estimator estimate se inquiry
hidden_size_rds_sspse 104.00 26.30 hidden_size
hidden_size_rds_chords 37.00 24.05 hidden_size
hidden_size_rds_multi 104.00 29.33 hidden_size
hidden_size_pps_nsum 101.82 6.32 hidden_size
hidden_size_pps_ht 85.00 19.85 hidden_size
hidden_prev_pps_ht 0.09 0.02 hidden_prev
hidden_size_tls_nsum 106.11 2.37 hidden_size
hidden_size_tls_ht 4.03 0.85 hidden_size
hidden_prev_tls_ht 0.00 0.00 hidden_prev
hidden_size_tls_recap 51.07 21.46 hidden_size
hidden_size_rds_pps_recap 98.82 16.27 hidden_size
hidden_size_rds_tls_recap 98.83 9.77 hidden_size

Step 6. Declare full study

study1 <- 
  study_population +
  study_sample_rds + study_sample_pps + study_sample_tls +
  study_estimands +
  est_sspse + est_chords + est_multi +
  est_nsum_pps + est_ht_pps +
  est_nsum_tls + est_ht_tls + est_recap_tls +
  est_recap_rds_pps + est_recap_rds_tls

# can draw one of the samples
study1$study_sample_rds(study1$study_population())
#> # A tibble: 1,000 × 84
#>     name type  known hidden links      service_use loc_1 loc_2 loc_3 loc_4 loc_5
#>    <int> <chr> <int>  <int> <chr>            <int> <int> <int> <int> <int> <int>
#>  1     1 00        0      0 11;136;41…           0     0     0     0     0     1
#>  2     2 00        0      0 88;181;24…           0     0     0     1     0     0
#>  3     3 00        0      0 41;82;260…           1     0     0     0     0     0
#>  4     4 00        0      0 24;212;61…           0     0     0     0     0     0
#>  5     5 00        0      0 43;69;72;…           0     1     0     0     0     0
#>  6     6 00        0      0 53;561;60…           1     0     0     0     1     0
#>  7     7 00        0      0 101;177;2…           0     0     0     0     0     0
#>  8     8 00        0      0 11;157;33…           0     0     0     0     0     1
#>  9     9 00        0      0 14;311;35…           0     0     0     0     0     0
#> 10    10 00        0      0 30;161;21…           0     0     0     0     0     1
#> # … with 990 more rows, and 73 more variables: loc_6 <int>, loc_7 <int>,
#> #   loc_8 <int>, loc_9 <int>, loc_10 <int>, known_2 <int>, known_3 <int>,
#> #   n_visible_out <dbl>, known_visible_out <dbl>, hidden_visible_out <dbl>,
#> #   type_00_visible_out <dbl>, type_01_visible_out <dbl>,
#> #   type_10_visible_out <dbl>, type_11_visible_out <dbl>,
#> #   service_use_visible_out <dbl>, loc_1_visible_out <dbl>,
#> #   loc_2_visible_out <dbl>, loc_3_visible_out <dbl>, …
# can draw full data
study1_data <- draw_data(study1)

# can draw estimands
study1$inquiry(study1_data)
#>                inquiry     estimand
#> 1           known_size 313.00000000
#> 2          hidden_size  95.00000000
#> 3           known_prev   0.31300000
#> 4          hidden_prev   0.09500000
#> 5 hidden_size_in_known  22.00000000
#> 6 hidden_prev_in_known   0.07028754
# can draw any of the estimators
study1$rds_sspse(study1_data)
#>               estimator estimate       se     inquiry
#> 1 hidden_size_rds_sspse    150.5 92.57726 hidden_size
study1$pps_ht(study1_data)
#>            estimator estimate          se     inquiry
#> 1 hidden_size_pps_ht   110.00 23.66712443 hidden_size
#> 2 hidden_prev_pps_ht     0.11  0.02366712 hidden_prev
study1$rds_pps_recap(study1_data)
#>                   estimator estimate       se     inquiry
#> 1 hidden_size_rds_pps_recap 92.53333 11.97572 hidden_size

Step 7. Diagnose study design

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 7)

set.seed(872312)
study1_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(study1, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(study1_simulations, "vignettes/study1_sim.rds")
study_diagnosands <-
 declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    mean_se = mean(se),
    bias = mean(estimate - estimand),
    rmse = sqrt(mean((estimate - estimand) ^ 2))
  )

study1_simulations <- readRDS(here::here("vignettes/study1_sim.rds"))

diagnose_design(simulations_df = study1_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis %>% select(-'Design') %>%
  kable()
Inquiry Estimator N Sims Mean Estimand Mean Estimate SD Estimate Mean Se Bias RMSE
hidden_prev hidden_prev_pps_ht 100 0.10 0.11 0.03 0.02 0.01 0.03
(0.00) (0.00) (0.00) (0.00) (0.00) (0.00)
hidden_prev hidden_prev_tls_ht 100 0.10 0.00 0.00 0.00 -0.09 0.09
(0.00) (0.00) (0.00) (0.00) (0.00) (0.00)
hidden_prev_in_known NA 100 0.09 NA NA NA NA NA
(0.00) NA NA NA NA NA
hidden_size hidden_size_pps_ht 100 95.17 106.50 28.15 21.77 11.33 27.10
(0.57) (2.75) (2.32) (0.27) (2.45) (2.18)
hidden_size hidden_size_pps_nsum 100 95.17 99.37 12.57 6.71 4.20 9.68
(0.57) (1.14) (1.04) (0.07) (0.76) (0.72)
hidden_size hidden_size_rds_chords 100 95.17 77.37 38.38 35.41 -17.80 43.65
(0.57) (3.95) (2.28) (0.81) (4.10) (1.92)
hidden_size hidden_size_rds_multi 100 95.17 91.42 13.31 23.72 -3.75 13.59
(0.57) (1.39) (1.17) (1.02) (1.39) (0.98)
hidden_size hidden_size_rds_pps_recap 100 95.17 101.07 19.64 15.32 5.90 17.13
(0.57) (1.90) (1.40) (0.60) (1.62) (1.28)
hidden_size hidden_size_rds_sspse 100 95.17 159.71 32.63 92.65 64.54 72.41
(0.57) (3.03) (4.67) (3.82) (3.04) (4.44)
hidden_size hidden_size_rds_tls_recap 100 95.17 98.38 11.34 10.23 3.21 11.04
(0.57) (1.23) (0.78) (0.29) (1.12) (0.83)
hidden_size hidden_size_tls_ht 100 95.17 3.43 1.15 0.90 -91.74 91.91
(0.57) (0.11) (0.07) (0.02) (0.53) (0.54)
hidden_size hidden_size_tls_nsum 100 95.17 98.73 14.26 7.60 3.56 11.03
(0.57) (1.53) (1.10) (0.31) (1.14) (0.86)
hidden_size hidden_size_tls_recap 100 95.17 33.72 15.29 13.19 -61.45 63.34
(0.57) (1.76) (3.00) (1.54) (1.70) (1.25)
hidden_size_in_known NA 100 27.35 NA NA NA NA NA
(0.34) NA NA NA NA NA
known_prev NA 100 0.30 NA NA NA NA NA
(0.00) NA NA NA NA NA
known_size NA 100 301.35 NA NA NA NA NA
(0.97) NA NA NA NA NA

Multi study declaration

Additional study

study_2 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 5000, K = 3, 
               prev_K = c(frame = .5, known = .2, hidden = .1), 
               rho_K = c(.05, .05, .05),
               p_edge_within = list(frame = c(0.05, 0.05), 
                                    known = c(0.1, 0.05), 
                                    hidden = c(0.2, 0.9)),
               p_edge_between = list(frame = 0.05, 
                                     known = 0.1, 
                                     hidden = 0.01),
               directed = FALSE),
        
        group_names = c("frame", "known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(frame = 1, known = 1, hidden = .6),
        
        # probability of service utilization in hidden population
        # for service multiplier
        
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.3)",
               "purrr::map_df(hidden, ~ sapply( `names<-`(runif(10, 0.05,.3), paste0('loc_', 1:10)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
               known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden",
                   n_seed = 100,
                   n_coupons = 3,
                   target_type = "waves",
                   target_n_rds = 2),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 4,
                   target_n_tls = 300,
                   cluster = paste0("loc_", 1:8)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = "frame",
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 800)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$|^frame$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse, 
                            label = "rds_sspse",
                            prior_mean = 450,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 5000,
                            rds_prefix = "rds"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             label = "rds_chords",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "frame"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:8),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(
            ht = list(handler = get_study_est_ht, 
                      label = "pps_ht"),
            nsum = list(handler = get_study_est_nsum,
                        known = c("frame", "known"),
                        hidden = "hidden_visible_out",
                        survey_design = ~ pps_cluster + strata(pps_strata),
                        n_boot = 100,
                        label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:8)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

study_3 <- 
  list(
    pop = 
      list(
        handler = get_study_population,
        
        # network structure setup
        network_handler = sim_block_network,
        network_handler_args = 
          list(N = 2000, K = 2, prev_K = c(known = .3, hidden = .1), rho_K = .3,
               p_edge_within = list(known = c(0.05, 0.1), hidden = c(0.05, 0.7)),
               p_edge_between = list(known = 0.05, hidden = 0.05),
               directed = FALSE),
        
        # groups
        group_names = c("known", "hidden"),
        
        # probability of visibility (show-up) for each group
        p_visible = list(known = 1, hidden = .7),
        
        # probability of service utilization in hidden population
        # for service multiplier
        add_groups = 
          list(service_use = "rbinom(n(), 1, 0.25 + hidden * 0.05)",
         "purrr::map_df(hidden, ~ sapply( `names<-`(c(.2, .1, .3, .2), paste0('loc_', 1:4)), function(add) rbinom(length(.x), 1, 0.1 + .x * add)))",
         known_2 = 0.1, known_3 = 0.2)
      ),
    sample = 
      list(
        rds = list(handler = sample_rds,
                   # RDS parameters
                   sampling_variable = "rds",
                   hidden_var = "hidden", # default
                   n_seed = 20,
                   n_coupons = 3,
                   target_type = "sample",
                   target_n_rds = 100),
        tls = list(handler = sample_tls,
                   sampling_variable = "tls",
                   # TLS sampling parameters
                   target_n_clusters = 3,
                   target_n_tls = 300,
                   cluster = paste0("loc_", 1:4)),
        pps = list(handler = sample_pps,
                   sampling_variable = "pps",
                   # prop sampling parameters
                   sampling_frame = NULL,
                   strata = NULL,
                   cluster = NULL,
                   target_n_pps = 400)
      ),
    inquiries = list(handler = get_study_estimands,
                     known_pattern = "^known$", 
                     hidden_var = "hidden"),
    estimators = 
      list(
        rds = 
          list(sspse = list(handler = get_study_est_sspse,
                            prior_mean = 200,
                            mcmc_params = list(interval = 5, burnin = 2000, samplesize = 500),
                            total = 2000,
                            rds_prefix = "rds", 
                            label = "rds_sspse"),
               chords = list(handler = get_study_est_chords, 
                             type = "mle",
                             seed_condition = "rds_from == -999",
                             n_boot = 100,
                             rds_prefix = "rds",
                             label = "rds_chords"),
               multiplier = list(handler = get_study_est_multiplier, 
                                 service_var = "service_use",
                                 seed_condition = "rds_from == -999",
                                 n_boot = 100,
                                 rds_prefix = "rds",
                                 label = "rds_multi")),
        tls =
          list(ht = list(handler = get_study_est_ht,
                         weight_var = "tls_weight",
                         prefix = "tls",
                         label = "tls_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ tls_cluster,
                           n_boot = 100,
                           prefix = "tls",
                           label = "tls_nsum"),
               recap = list(handler = get_study_est_recapture,
                            capture_vars = paste0("loc_", 1:4),
                            sample_condition = "tls == 1",
                            model = "Mt",
                            hidden_variable = "hidden",
                            label = "tls_recap")),
        pps = 
          list(ht = list(handler = get_study_est_ht,
                         prefix = "pps",
                         label = "pps_ht"),
               nsum = list(handler = get_study_est_nsum,
                           known = c("known", "known_2", "known_3"),
                           hidden = "hidden_visible_out",
                           survey_design = ~ pps_cluster + strata(pps_strata),
                           n_boot = 100,
                           prefix = "pps",
                           label = "pps_nsum")),
        all = 
          list(recap1 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", "pps"),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_pps_recap"),
               recap2 = list(handler = get_study_est_recapture,
                             capture_vars = c("rds", paste0("loc_", 1:4)),
                             model = "Mt",
                             hidden_variable = "hidden",
                             label = "rds_tls_recap"))
      )
  )

Declare multiple studies

multi_population <- 
  declare_population(handler = get_multi_populations, 
                     pops_args = list(study_1 = study_1$pop,
                                      study_2 = study_2$pop,
                                      study_3 = study_3$pop))

multi_sampling <- 
  declare_sampling(handler = get_multi_samples, 
                   samples_args = list(study_1 = study_1$sample,
                                       study_2 = study_2$sample,
                                       study_3 = study_3$sample)) 

multi_inquiry <- 
  declare_inquiry(handler = get_multi_estimands, 
                  inquiries_args = list(study_1 = study_1$inquiries,
                                        study_2 = study_2$inquiries,
                                        study_3 = study_3$inquiries)) 

multi_estimators <-
  declare_estimator(handler = get_multi_estimates,
                    estimators_args = list(study_1 = study_1$estimators,
                                           study_2 = study_2$estimators,
                                           study_3 = study_3$estimators))

multi_study <- multi_population + multi_sampling + multi_inquiry + multi_estimators

set.seed(872312)
draw_estimands(multi_study) %>% 
  kable()
inquiry estimand
study_1_known_size 318.0000000
study_1_hidden_size 100.0000000
study_1_known_prev 0.3180000
study_1_hidden_prev 0.1000000
study_1_hidden_size_in_known 38.0000000
study_1_hidden_prev_in_known 0.1194969
study_2_frame_size 2521.0000000
study_2_known_size 955.0000000
study_2_hidden_size 481.0000000
study_2_frame_prev 0.5042000
study_2_known_prev 0.1910000
study_2_hidden_prev 0.0962000
study_2_hidden_size_in_frame 269.0000000
study_2_hidden_prev_in_frame 0.1067037
study_2_hidden_size_in_known 114.0000000
study_2_hidden_prev_in_known 0.1193717
study_3_known_size 590.0000000
study_3_hidden_size 211.0000000
study_3_known_prev 0.2950000
study_3_hidden_prev 0.1055000
study_3_hidden_size_in_known 145.0000000
study_3_hidden_prev_in_known 0.2457627
draw_estimates(multi_study) %>% 
  kable()
estimator estimate se inquiry
hidden_size_rds_sspse 160.5000000 97.5311122 study_1_hidden_size
hidden_size_rds_chords 78.0000000 28.2386994 study_1_hidden_size
hidden_size_rds_multi 130.9090909 64.5431295 study_1_hidden_size
hidden_size_tls_ht 4.2110092 0.9546678 study_1_hidden_size
hidden_prev_tls_ht 0.0042110 0.0009547 study_1_hidden_prev
hidden_size_tls_nsum 106.4848460 3.8868799 study_1_hidden_size
hidden_size_tls_recap 65.8977969 28.2799918 study_1_hidden_size
hidden_size_pps_ht 80.0000000 19.0809789 study_1_hidden_size
hidden_prev_pps_ht 0.0800000 0.0190810 study_1_hidden_prev
hidden_size_pps_nsum 116.6671698 7.3704423 study_1_hidden_size
hidden_size_rds_pps_recap 78.7692308 9.1734484 study_1_hidden_size
hidden_size_rds_tls_recap 133.0891175 15.8443204 study_1_hidden_size
hidden_size_rds_sspse 445.5000000 513.5584734 study_2_hidden_size
hidden_size_rds_chords 362.0000000 118.3849243 study_2_hidden_size
hidden_size_rds_multi 555.2210526 35.5111343 study_2_hidden_size
hidden_size_tls_ht 49.3191882 22.7856632 study_2_hidden_size
hidden_prev_tls_ht 0.0098638 0.0045571 study_2_hidden_prev
hidden_size_tls_nsum 739.9761541 79.0004286 study_2_hidden_size
hidden_size_tls_recap 65.7249135 1.8443869 study_2_hidden_size
hidden_size_pps_ht 257.4800000 27.0490944 study_2_hidden_size
hidden_prev_pps_ht 0.0514960 0.0054098 study_2_hidden_prev
hidden_size_pps_nsum 207.6162547 26.6807344 study_2_hidden_size
hidden_size_rds_pps_recap 398.2972973 42.9184468 study_2_hidden_size
hidden_size_rds_tls_recap 551.2352545 6.3146870 study_2_hidden_size
hidden_size_rds_sspse 202.0000000 57.8507824 study_3_hidden_size
hidden_size_rds_chords 124.0000000 46.5259556 study_3_hidden_size
hidden_size_rds_multi 153.8461538 18.7141888 study_3_hidden_size
hidden_size_tls_ht 32.4611111 4.5375350 study_3_hidden_size
hidden_prev_tls_ht 0.0162306 0.0022688 study_3_hidden_prev
hidden_size_tls_nsum 440.2018810 28.4622269 study_3_hidden_size
hidden_size_tls_recap 66.9238085 4.1726843 study_3_hidden_size
hidden_size_pps_ht 195.0000000 28.5714286 study_3_hidden_size
hidden_prev_pps_ht 0.0975000 0.0142857 study_3_hidden_prev
hidden_size_pps_nsum 290.1977901 19.8818273 study_3_hidden_size
hidden_size_rds_pps_recap 201.0000000 27.9131306 study_3_hidden_size
hidden_size_rds_tls_recap 179.1636496 5.7694275 study_3_hidden_size

Diagnose multiple studies

# multi_simulations <- simulate_design(multi_study, sims = 2)

requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
multi_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(multi_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(multi_simulations, file = "vignettes/multi_sim.rds")
multi_simulations <- readRDS(here::here("vignettes/multi_sim.rds"))

diagnose_design(simulations_df = multi_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))

Meta-analysis structure

  1. Conduct multi-study design for as many sampling-estimator pairs in each study as possible, then diagnoise the multi-study design. Calculate average (across simulations) estimand and bias of sampling-estimator for each of the studies and estimator sampling strategies. These will serve as population we will be drawing population-sampling-estimator triads

  2. Estimands include:

    • Average estimand by inquiry label (within study)
    • Average bias of specific sampling-estimator pair (across studies) compared to truth
    • Average relative bias of sampling-estimator pair (across studies) compared to “gold standard”
    • Ratio of average bias to costs of sampling-estimator pair
  3. As mentioned before sampling will consist of drawing population-sampling-estimator triads from the population presuming that each study uses at least two sampling strategies at a time

  4. Once we draw sample we use Stan model to estimate study-specific estimands and sampling-estimator specific errors (biases)

  5. We have all parts to conduct the full cycle of meta-analyses

meta_population <- 
  declare_population(multi_design = multi_study, n_sim = 3, parallel = FALSE, 
                     handler = get_meta_population)

meta_inquiry <- 
  declare_inquiry(study_estimand = "hidden_size",
                  samp_est_benchmark = "pps_ht", 
                  handler = get_meta_estimands)

meta_sample <- 
  declare_sampling(sampling_variable = "meta",
                   selection_variables = c("sample", "estimator"),
                   samples_per_study = 2, estimator_per_sample = 3, 
                   force = list(sample = "pps", estimator = "ht"),
                   handler = get_meta_sample)

meta_estimator <- 
  declare_estimator(sampling_variable = "meta",
                    which_estimand = "hidden_size",
                    benchmark = list(sample = "pps", estimator = "ht"),
                    parallel = FALSE,
                    stan_handler = get_meta_stan,
                    handler = get_meta_estimates)

meta_study <- meta_population + meta_inquiry + meta_sample + meta_estimator

set.seed(872312)
draw_estimands(meta_study) %>% 
  kable()
inquiry estimand
bias_pps_ht -75.5751389
bias_pps_nsum -71.4793073
bias_rds_chords -109.5555556
bias_rds_multi 6.0433312
bias_rds_pps_recap -8.1517712
bias_rds_sspse 457.1111111
bias_rds_tls_recap 0.9378198
bias_sd_pps_ht 123.4264977
bias_sd_pps_nsum 214.8833125
bias_sd_rds_chords 165.3839397
bias_sd_rds_multi 25.8688849
bias_sd_rds_pps_recap 34.0001860
bias_sd_rds_sspse 710.6276847
bias_sd_rds_tls_recap 2.6262751
bias_sd_tls_ht 183.3685072
bias_sd_tls_nsum 124.4426607
bias_sd_tls_recap 195.7716216
bias_tls_ht -233.0425404
bias_tls_nsum 124.6250478
bias_tls_recap -203.3617179
rel_bias_pps_ht 1.0000000
rel_bias_pps_nsum 0.9368880
rel_bias_rds_chords 1.4782626
rel_bias_rds_multi -0.0568482
rel_bias_rds_pps_recap 0.1282246
rel_bias_rds_sspse -5.9293901
rel_bias_rds_tls_recap -0.0121496
rel_bias_sd_pps_ht 1.0000000
rel_bias_sd_pps_nsum 1.7537049
rel_bias_sd_rds_chords 1.3302692
rel_bias_sd_rds_multi 0.2008103
rel_bias_sd_rds_pps_recap 0.2707674
rel_bias_sd_rds_sspse 5.9693857
rel_bias_sd_rds_tls_recap 0.0207805
rel_bias_sd_tls_ht 1.5034308
rel_bias_sd_tls_nsum 1.0205011
rel_bias_sd_tls_recap 1.6034942
rel_bias_tls_ht 3.1280115
rel_bias_tls_nsum -1.6760795
rel_bias_tls_recap 2.7298581
study_1_hidden_size 94.6666667
study_2_hidden_size 490.3333333
study_3_hidden_size 209.3333333
draw_estimates(meta_study) %>% 
  kable()
#> Running /usr/lib/R/bin/R CMD SHLIB foo.c
#> gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG   -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/include/"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/unsupported"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/BH/include" -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/src/"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppParallel/include/"  -I"/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1      -fpic  -g -O2 -ffile-prefix-map=/build/r-base-kbcSEe/r-base-4.1.1=. -flto=auto -ffat-lto-objects -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g  -c foo.c -o foo.o
#> In file included from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Core:88,
#>                  from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Dense:1,
#>                  from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
#>                  from <command-line>:
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name ‘namespace’
#>   628 | namespace Eigen {
#>       | ^~~~~~~~~
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:17: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
#>   628 | namespace Eigen {
#>       |                 ^
#> In file included from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Dense:1,
#>                  from /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
#>                  from <command-line>:
#> /home/gerasy/R/x86_64-pc-linux-gnu-library/4.1/RcppEigen/include/Eigen/Core:96:10: fatal error: complex: No such file or directory
#>    96 | #include <complex>
#>       |          ^~~~~~~~~
#> compilation terminated.
#> make: *** [/usr/lib/R/etc/Makeconf:168: foo.o] Error 1
estimator estimate se inquiry
rel_bias_pps_ht_meta 1.0000000 0.0000000 rel_bias_pps_ht
rel_bias_rds_sspse_meta 9.0255787 0.9682940 rel_bias_rds_sspse
rel_bias_rds_multi_meta 2.0092177 0.5122053 rel_bias_rds_multi
rel_bias_pps_nsum_meta 1.2651668 0.1307401 rel_bias_pps_nsum
rel_bias_rds_pps_recap_meta 2.0172260 0.2839682 rel_bias_rds_pps_recap
rel_bias_rds_chords_meta 0.7451670 0.4735371 rel_bias_rds_chords
rel_bias_tls_ht_meta 0.2008796 0.0290836 rel_bias_tls_ht
rel_bias_tls_recap_meta 0.3716098 0.0425775 rel_bias_tls_recap
study_1_meta 68.6466563 15.1119178 study_1_hidden_size
study_2_meta 363.3712115 73.2594432 study_2_hidden_size
study_3_meta 308.6784268 63.8887280 study_3_hidden_size
requireNamespace(c("doParallel", "parallel"))
doParallel::registerDoParallel(cores = 20)

set.seed(872312)
meta_simulations <-
  plyr::llply(
    as.list(1:100),
    .fun = function(x) {
      base::suppressWarnings(
        DeclareDesign::simulate_design(meta_study, sims = 1) %>%
          dplyr::mutate(
            sim_ID = x
          )
      )
    },
    .parallel = TRUE
  ) %>%
  dplyr::bind_rows()

saveRDS(meta_simulations, file = "vignettes/meta_sim.rds")
meta_simulations <- readRDS(here::here("vignettes/meta_sim.rds"))

diagnose_design(simulations_df = meta_simulations, 
                diagnosands = study_diagnosands) %>% 
  reshape_diagnosis(digits = 2) %>%
  dplyr::select(-'Design') %>%
  dplyr::filter(`Mean Estimate` != "NA") %>% 
  DT::datatable(options = list(scrollX = TRUE, pageLength = 15))